In [1]:
import os,sys

import pandas as pd
import numpy as np
import keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout
import keras.utils
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics
import random
Using TensorFlow backend.

IMPORTANT

Change the nrows below to train on full dataset.

In [2]:
# df = pd.read_csv("../data/exT.csv",low_memory=False, nrows=2000)
df = pd.read_csv("../data/exT.csv", skiprows=lambda i: i>0 and random.random() > 0.70)

print(df.shape)
df.head()
(11152, 19103)
Out[2]:
Unnamed: 0 RAB4B TIGAR RNF44 DNAH3 RPL23A ARL8B CALB2 MFSD3 PIGV ... SLCO4C1 ARHGAP21 DDX47 POMZP3 SDS TENM1 CYP4F2 PPP6R1 BATF3 OR8D4
0 Liver.1 2.367912 0.698684 3.931116 0.000000 116.288461 13.839265 0.021383 14.722173 5.437620 ... 0.000000 1.199607 0.101155 1.428458 2.471339 0.042357 44.610061 6.394772 0.281878 0.0
1 Prostate.1 5.592506 2.953563 7.065690 0.012499 325.521932 22.144552 0.356845 28.899361 6.717593 ... 0.030674 2.062158 0.185503 8.414909 2.690346 0.688731 0.260365 9.792451 1.671371 0.0
2 Leukemia.1 31.694000 2.534200 4.407400 13.057300 5.367900 2.775700 0.000000 9.161600 3.239400 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
3 Bladder.1 3.341472 5.029494 6.565113 0.080729 150.894571 20.842489 0.050655 12.871097 8.391810 ... 0.396229 15.934226 1.086867 6.501145 0.480683 0.010034 0.027455 31.207578 0.174882 0.0
4 Sarcoma.1 19.799300 2.672300 4.944100 4.778500 3.003800 4.543300 0.009800 13.796900 12.044100 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0

5 rows × 19103 columns

In [3]:
# df.info(verbose = False)
In [4]:
df = df.rename(columns={'Unnamed: 0':'labels'})
df.head()
Out[4]:
labels RAB4B TIGAR RNF44 DNAH3 RPL23A ARL8B CALB2 MFSD3 PIGV ... SLCO4C1 ARHGAP21 DDX47 POMZP3 SDS TENM1 CYP4F2 PPP6R1 BATF3 OR8D4
0 Liver.1 2.367912 0.698684 3.931116 0.000000 116.288461 13.839265 0.021383 14.722173 5.437620 ... 0.000000 1.199607 0.101155 1.428458 2.471339 0.042357 44.610061 6.394772 0.281878 0.0
1 Prostate.1 5.592506 2.953563 7.065690 0.012499 325.521932 22.144552 0.356845 28.899361 6.717593 ... 0.030674 2.062158 0.185503 8.414909 2.690346 0.688731 0.260365 9.792451 1.671371 0.0
2 Leukemia.1 31.694000 2.534200 4.407400 13.057300 5.367900 2.775700 0.000000 9.161600 3.239400 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
3 Bladder.1 3.341472 5.029494 6.565113 0.080729 150.894571 20.842489 0.050655 12.871097 8.391810 ... 0.396229 15.934226 1.086867 6.501145 0.480683 0.010034 0.027455 31.207578 0.174882 0.0
4 Sarcoma.1 19.799300 2.672300 4.944100 4.778500 3.003800 4.543300 0.009800 13.796900 12.044100 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0

5 rows × 19103 columns

In [5]:
# df.describe() # not so helpful because of 19k features
In [6]:
# df.isna().sum().sum()/ len(df) * 100 # check for NaNs, If any..
In [7]:
df['Y'] = df['labels'].apply(lambda x: x.split(".")[0])
df['Y'].head()
Out[7]:
0       Liver
1    Prostate
2    Leukemia
3     Bladder
4     Sarcoma
Name: Y, dtype: object
In [8]:
df['Y'].value_counts().plot(kind='barh', figsize = (10,6))
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f7c9c6f9908>

Looks like the dataset is extremely imbalanced. Might have to do do something for it.

I tried to visualize these values as well. Figure below shows the boxplots for two of these features.

In [9]:
# import seaborn as sns
# sns.boxplot(df['TIGAR'], whis= 3)
# plt.xlim(0, 250)
In [10]:
# import seaborn as sns
# sns.boxplot(df['RAB4B'], whis= 3)
# plt.xlim(0, 250)

There are some outliers, but not sure we need to remove them for classifier to improve its robustness or use just as it is since this is Gene's data. Also since we are more concerned on the determination of classification, I'd say we can get away with this since it performed satisfactorily.

Label Encoding the categorical variable Y

In [11]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
label_encoder = LabelEncoder()

df['Y'] = label_encoder.fit_transform(df['Y'])
df['Y'].head()
Out[11]:
0    13
1    24
2    12
3     2
4    26
Name: Y, dtype: int64
In [12]:
print(len(label_encoder.classes_))
label_encoder.classes_
34
Out[12]:
array(['Adrenal', 'Bile', 'Bladder', 'Bone', 'Brain', 'Breast', 'Cervix',
       'Colon', 'Esophagus', 'Fallopian', 'Head', 'Kidney', 'Leukemia',
       'Liver', 'Lung', 'Lymph', 'Mediastinum', 'Nervous', 'Ocular',
       'Ovarian', 'Pancreas', 'Pelvis', 'Peritoneum', 'Pleura',
       'Prostate', 'Rectum', 'Sarcoma', 'Skin', 'Stomach', 'Testis',
       'Thymus', 'Thyroid', 'Uterus', 'none'], dtype=object)
In [13]:
labels = df.pop("labels")
Y = df.pop("Y")
In [14]:
Y.head()
Out[14]:
0    13
1    24
2    12
3     2
4    26
Name: Y, dtype: int64

Converting Y into 1-hot

In [15]:
Y = keras.utils.to_categorical(Y)  # verify this is of n length not 2
Y
Out[15]:
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)
In [16]:
df.head() # X
Out[16]:
RAB4B TIGAR RNF44 DNAH3 RPL23A ARL8B CALB2 MFSD3 PIGV ZNF708 ... SLCO4C1 ARHGAP21 DDX47 POMZP3 SDS TENM1 CYP4F2 PPP6R1 BATF3 OR8D4
0 2.367912 0.698684 3.931116 0.000000 116.288461 13.839265 0.021383 14.722173 5.437620 0.124913 ... 0.000000 1.199607 0.101155 1.428458 2.471339 0.042357 44.610061 6.394772 0.281878 0.0
1 5.592506 2.953563 7.065690 0.012499 325.521932 22.144552 0.356845 28.899361 6.717593 1.759132 ... 0.030674 2.062158 0.185503 8.414909 2.690346 0.688731 0.260365 9.792451 1.671371 0.0
2 31.694000 2.534200 4.407400 13.057300 5.367900 2.775700 0.000000 9.161600 3.239400 0.628100 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0
3 3.341472 5.029494 6.565113 0.080729 150.894571 20.842489 0.050655 12.871097 8.391810 0.130546 ... 0.396229 15.934226 1.086867 6.501145 0.480683 0.010034 0.027455 31.207578 0.174882 0.0
4 19.799300 2.672300 4.944100 4.778500 3.003800 4.543300 0.009800 13.796900 12.044100 0.332600 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.0

5 rows × 19102 columns

Train-Test Split (Stratified and shuffled)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(df, Y, test_size = 0.2, random_state = 42, stratify=Y, shuffle=True)
X_train.shape, X_test.shape, y_train.shape, y_test.shape
Out[17]:
((8921, 19102), (2231, 19102), (8921, 34), (2231, 34))

Faced MemoryError while standardizing this so doing these steps

  • reset df variable from memory
  • save backup for splitted data
In [ ]:
# Variables in memory that's hogging the memories greater than 1MB.
local_vars = list(locals().items())
for var, obj in local_vars:
    size = sys.getsizeof(obj)/1000
    if size > 1024:
        print(var, size/1024,"MB")

# df 2439707.544 KB # Evenif the memory was just 2.4GB, it was giving issues occassionally.
# labels 1075.379 KB
# X_train 1951868.152 KB
# X_test 487967.056 KB
# y_train 1737.104 KB
In [ ]:
# %reset_selective -f "^df$"  # Releasing memory of df variable

Feature Scaling

In [18]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test) 
X_test.shape
Out[18]:
(2231, 19102)
In [ ]:
filename = "../data/load/5k_lime/7scaled_splitted_data_lime.npz"
# filename = "../data/splitted_data_X_train.npz"
In [ ]:
np.savez_compressed(filename, X_train=X_train, X_test=X_test, 
                    y_train=y_train, y_test=y_test,
                    feature_names=df.columns,
                    class_names=label_encoder.classes_
                   )
print(f"{filename} data saved..")

Load the splitted variables for next step. Just Run this below to load them and start experimentation later.

In [ ]:
# a = np.load(filename)
In [ ]:
# X_train, X_test, y_train, y_test = a["X_train"], a["X_test"], a["y_train"], a["y_test"]
X_train.shape, X_test.shape, y_train.shape, y_test.shape

Model Definition

In [30]:
tf.keras.backend.clear_session() # reset keras session
In [31]:
model = Sequential()
model.add(Dense(1024, activation="relu", input_dim = X_train.shape[1]))
model.add(Dense(512, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(y_train.shape[1], activation="softmax"))

model.summary()
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 1024)              19561472  
_________________________________________________________________
dense_2 (Dense)              (None, 512)               524800    
_________________________________________________________________
dense_3 (Dense)              (None, 128)               65664     
_________________________________________________________________
dense_4 (Dense)              (None, 34)                4386      
=================================================================
Total params: 20,156,322
Trainable params: 20,156,322
Non-trainable params: 0
_________________________________________________________________

Training

In [32]:
checkpoint_path = "out/lime/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                  save_best_only=True,
                                                  monitor='val_accuracy',
                                                 verbose=1)

es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
In [33]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['categorical_crossentropy','accuracy'])
In [34]:
history = model.fit(X_train, y_train, 
                    validation_data=(X_test, y_test), 
                    verbose=1, 
                    epochs=20, 
                    shuffle=True,
                    callbacks = [ es_callback, cp_callback])
Train on 8921 samples, validate on 2231 samples
Epoch 1/20
8921/8921 [==============================] - 15s 2ms/step - loss: 1.0705 - categorical_crossentropy: 1.0705 - accuracy: 0.8366 - val_loss: 0.5809 - val_categorical_crossentropy: 0.5809 - val_accuracy: 0.8754

Epoch 00001: val_accuracy improved from -inf to 0.87539, saving model to out/lime/cp.ckpt
Epoch 2/20
8921/8921 [==============================] - 12s 1ms/step - loss: 0.4033 - categorical_crossentropy: 0.4033 - accuracy: 0.9105 - val_loss: 0.5177 - val_categorical_crossentropy: 0.5177 - val_accuracy: 0.9171

Epoch 00002: val_accuracy improved from 0.87539 to 0.91708, saving model to out/lime/cp.ckpt
Epoch 3/20
8921/8921 [==============================] - 11s 1ms/step - loss: 0.2596 - categorical_crossentropy: 0.2596 - accuracy: 0.9357 - val_loss: 0.4886 - val_categorical_crossentropy: 0.4886 - val_accuracy: 0.9242

Epoch 00003: val_accuracy improved from 0.91708 to 0.92425, saving model to out/lime/cp.ckpt
Epoch 4/20
8921/8921 [==============================] - 11s 1ms/step - loss: 0.1993 - categorical_crossentropy: 0.1993 - accuracy: 0.9469 - val_loss: 0.4732 - val_categorical_crossentropy: 0.4732 - val_accuracy: 0.9283

Epoch 00004: val_accuracy improved from 0.92425 to 0.92828, saving model to out/lime/cp.ckpt
Epoch 5/20
8921/8921 [==============================] - 16s 2ms/step - loss: 0.3296 - categorical_crossentropy: 0.3296 - accuracy: 0.9274 - val_loss: 0.8980 - val_categorical_crossentropy: 0.8980 - val_accuracy: 0.9095

Epoch 00005: val_accuracy did not improve from 0.92828
Epoch 6/20
8921/8921 [==============================] - 16s 2ms/step - loss: 0.2152 - categorical_crossentropy: 0.2152 - accuracy: 0.9488 - val_loss: 0.8692 - val_categorical_crossentropy: 0.8692 - val_accuracy: 0.9189

Epoch 00006: val_accuracy did not improve from 0.92828
Epoch 7/20
8921/8921 [==============================] - 17s 2ms/step - loss: 0.1565 - categorical_crossentropy: 0.1565 - accuracy: 0.9590 - val_loss: 0.8497 - val_categorical_crossentropy: 0.8497 - val_accuracy: 0.9274

Epoch 00007: val_accuracy did not improve from 0.92828
Epoch 8/20
8921/8921 [==============================] - 16s 2ms/step - loss: 0.1088 - categorical_crossentropy: 0.1088 - accuracy: 0.9703 - val_loss: 0.7074 - val_categorical_crossentropy: 0.7074 - val_accuracy: 0.9355

Epoch 00008: val_accuracy improved from 0.92828 to 0.93545, saving model to out/lime/cp.ckpt
Epoch 9/20
8921/8921 [==============================] - 17s 2ms/step - loss: 0.1070 - categorical_crossentropy: 0.1070 - accuracy: 0.9672 - val_loss: 0.9760 - val_categorical_crossentropy: 0.9760 - val_accuracy: 0.9350

Epoch 00009: val_accuracy did not improve from 0.93545
Epoch 10/20
8921/8921 [==============================] - 19s 2ms/step - loss: 0.0951 - categorical_crossentropy: 0.0951 - accuracy: 0.9734 - val_loss: 1.2120 - val_categorical_crossentropy: 1.2120 - val_accuracy: 0.9399

Epoch 00010: val_accuracy improved from 0.93545 to 0.93994, saving model to out/lime/cp.ckpt
Epoch 11/20
8921/8921 [==============================] - 18s 2ms/step - loss: 0.1598 - categorical_crossentropy: 0.1598 - accuracy: 0.9581 - val_loss: 1.5352 - val_categorical_crossentropy: 1.5352 - val_accuracy: 0.9305

Epoch 00011: val_accuracy did not improve from 0.93994
Epoch 12/20
8921/8921 [==============================] - 18s 2ms/step - loss: 0.1641 - categorical_crossentropy: 0.1641 - accuracy: 0.9617 - val_loss: 1.2325 - val_categorical_crossentropy: 1.2325 - val_accuracy: 0.9287

Epoch 00012: val_accuracy did not improve from 0.93994
Epoch 13/20
8921/8921 [==============================] - 17s 2ms/step - loss: 0.2101 - categorical_crossentropy: 0.2101 - accuracy: 0.9537 - val_loss: 1.4858 - val_categorical_crossentropy: 1.4858 - val_accuracy: 0.9072

Epoch 00013: val_accuracy did not improve from 0.93994
Epoch 14/20
8921/8921 [==============================] - 17s 2ms/step - loss: 0.1879 - categorical_crossentropy: 0.1879 - accuracy: 0.9559 - val_loss: 1.4809 - val_categorical_crossentropy: 1.4809 - val_accuracy: 0.9364

Epoch 00014: val_accuracy did not improve from 0.93994
Epoch 15/20
8921/8921 [==============================] - 19s 2ms/step - loss: 0.1779 - categorical_crossentropy: 0.1779 - accuracy: 0.9599 - val_loss: 2.8469 - val_categorical_crossentropy: 2.8469 - val_accuracy: 0.9301

Epoch 00015: val_accuracy did not improve from 0.93994
Epoch 16/20
8921/8921 [==============================] - 21s 2ms/step - loss: 0.1194 - categorical_crossentropy: 0.1194 - accuracy: 0.9714 - val_loss: 2.7045 - val_categorical_crossentropy: 2.7045 - val_accuracy: 0.9184

Epoch 00016: val_accuracy did not improve from 0.93994
Epoch 17/20
8921/8921 [==============================] - 23s 3ms/step - loss: 0.2070 - categorical_crossentropy: 0.2070 - accuracy: 0.9573 - val_loss: 1.8395 - val_categorical_crossentropy: 1.8395 - val_accuracy: 0.9314

Epoch 00017: val_accuracy did not improve from 0.93994
Epoch 18/20
8921/8921 [==============================] - 20s 2ms/step - loss: 0.1138 - categorical_crossentropy: 0.1138 - accuracy: 0.9698 - val_loss: 1.3370 - val_categorical_crossentropy: 1.3370 - val_accuracy: 0.9283

Epoch 00018: val_accuracy did not improve from 0.93994
Epoch 19/20
8921/8921 [==============================] - 17s 2ms/step - loss: 0.1248 - categorical_crossentropy: 0.1248 - accuracy: 0.9697 - val_loss: 1.6651 - val_categorical_crossentropy: 1.6651 - val_accuracy: 0.9292

Epoch 00019: val_accuracy did not improve from 0.93994
Epoch 20/20
8921/8921 [==============================] - 19s 2ms/step - loss: 0.1041 - categorical_crossentropy: 0.1041 - accuracy: 0.9753 - val_loss: 1.4932 - val_categorical_crossentropy: 1.4932 - val_accuracy: 0.9323

Epoch 00020: val_accuracy did not improve from 0.93994

Evaluating with Validation set

In [ ]:
predicted_valid_labels = np.argmax(model.predict(X_test), axis=1)
valid_labels = np.argmax(y_test, axis=1)

test_range = range(10)
print("Predicted labels: ", predicted_valid_labels[test_range])
print("True labels: ", valid_labels[test_range])

real = label_encoder.inverse_transform(valid_labels[test_range])
predicts = label_encoder.inverse_transform(predicted_valid_labels[test_range])
print("real:preds\n",{real[i]:predicts[i] for i in test_range})
In [ ]:
# Visualization of Confusion Matrix
import seaborn as sns

cm = metrics.confusion_matrix(valid_labels, predicted_valid_labels)
# print(cm)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

plt.figure(figsize=(20,20))
sns.heatmap(cm_normalized, annot=True, fmt=".4f", linewidths=.5, square = True, cmap = 'summer')
plt.xlabel('Predicted Values', size=20)
plt.ylabel('Actual Values', size=20)

ticks = np.arange(len(set(valid_labels)))
tick_marks = ['Adrenal', 'Bile', 'Bladder', 'Bone', 'Brain', 'Breast', 'Cervix',
       'Colon', 'Esophagus', 'Fallopian', 'Head', 'Kidney', 'Leukemia',
       'Liver', 'Lung', 'Lymph', 'Mediastinum', 'Nervous', 'Ocular',
       'Ovarian', 'Pancreas', 'Pelvis', 'Peritoneum', 'Pleura',
       'Prostate', 'Rectum', 'Sarcoma', 'Skin', 'Stomach', 'Testis',
       'Thymus', 'Thyroid', 'Uterus', 'none']

plt.xticks(ticks+0.5 ,tick_marks, rotation=90, size=12) #add 0.5 to ticks to position it at center
plt.yticks(ticks+0.5 ,tick_marks, rotation=0, size=12)
# all_sample_title = 'Accuracy Score: {:.4f}'.format(93) # hardcoded this from training logs for now :D
# plt.title(all_sample_title, size = 30)
plt.show()

Lime Explainer

In [35]:
import lime
import lime.lime_tabular
import time

we compute statistics on each feature (column). If the feature is numerical, we compute the mean and std, and discretize it into quartiles

In [36]:
# %%prun
start = time.time()
explainer = lime.lime_tabular.LimeTabularExplainer(X_train, 
                                                   feature_names=df.columns, 
                                                   class_names=label_encoder.classes_)
print("Elapsed time:", time.time() - start)
Elapsed time: 74.12125563621521
In [37]:
for _ in range(5):
    start = time.time()
    ith = np.random.randint(0, X_test.shape[0])
    print(ith,"th test sample")
    exp = explainer.explain_instance(X_test[ith], model.predict_proba, num_features=20, top_labels=5)
    
    exp.show_in_notebook(show_table=True, show_all=True)
    
    for i in exp.available_labels():
        print(i,"/len(exp.available_labels())th class: ", label_encoder.classes_[i])
    #     display(pd.DataFrame(exp.as_list(label=i)))
        display(exp.as_list(label=i))
    print("Iteration Elapsed time:", time.time() - start)
961 th test sample
27 th class:  Skin
[('FABP4 > -0.10', 0.004900622583027054),
 ('ARAP3 > 0.39', 0.004279639427193811),
 ('-0.05 < WWP2 <= -0.05', 0.004135313904076373),
 ('-0.32 < REC8 <= -0.12', 0.0040643931026405275),
 ('RBMS3 > 0.14', 0.0039481294529416),
 ('SSRP1 > 0.60', 0.0039290233051116445),
 ('-0.09 < TTL <= 0.48', 0.00379172554822767),
 ('RXRB > 0.64', 0.0037777796873132417),
 ('GNA14 <= -0.49', 0.0037260069729724787),
 ('IGLJ1 > -0.07', 0.003628195614552136),
 ('TGM4 <= -0.04', 0.00336204635185952),
 ('-0.36 < PPIAL4A <= -0.28', 0.0033177470114618937),
 ('0.05 < C1D <= 0.51', 0.003168614710300418),
 ('ADCK1 > 0.32', 0.003094354620244731),
 ('OR6K6 > -0.15', 0.003089991386578528),
 ('0.10 < SRP54 <= 0.53', 0.0029566344595565006),
 ('DKK4 <= -0.05', 0.0028183794916554514),
 ('RABEP1 > 0.14', 0.0027699658993313113),
 ('-0.15 < SMC6 <= 0.43', 0.0026992432599874747),
 ('-0.23 < CISD3 <= 0.27', -0.000627824746190579)]
18 th class:  Ocular
[('-0.31 < GPR37 <= -0.27', -0.002543873649998079),
 ('-0.31 < PTGES <= -0.03', 0.002321456961955612),
 ('-0.23 < JPH3 <= -0.22', 0.002203079179036394),
 ('0.22 < IK <= 0.70', -0.0020228052839010298),
 ('RPL9 > 0.32', -0.001986371776084894),
 ('AC009060.2 <= -0.20', -0.0018878475122103265),
 ('-0.59 < ZNF429 <= -0.23', -0.001848470115372621),
 ('KCNC1 > -0.19', 0.0018075074353088637),
 ('ABO <= -0.52', 0.0017688171227666447),
 ('-0.11 < TMEM134 <= 0.49', -0.0016886326981547812),
 ('-1.06 < SF3B5 <= -0.02', -0.001677832478023872),
 ('-0.12 < TRPM3 <= -0.09', 0.0014658072305095001),
 ('-0.28 < KCNH8 <= -0.26', 0.0014574671231296446),
 ('-0.09 < TAC4 <= -0.02', 0.0013576975912715852),
 ('TXNRD2 > 0.31', 0.0013493620339779671),
 ('ZFAND5 > 0.48', 0.0013440409667207346),
 ('-1.12 < GGPS1 <= 0.03', -0.0012612739197187545),
 ('-0.68 < TRIM22 <= -0.26', 0.0012499962263292867),
 ('-0.27 < POMK <= 0.07', 0.0011972695307478072),
 ('-0.54 < HSPA1B <= -0.21', -0.001142780933613765)]
4 th class:  Brain
[('UBE2G2 > 0.58', -0.015869454548144785),
 ('AC124312.1 <= -0.28', -0.015864294913889798),
 ('AASS > 0.21', 0.015856965434802098),
 ('ARHGEF11 > 0.45', 0.015269103226793895),
 ('OR56A3 <= -0.19', 0.01501818605660824),
 ('KANSL3 > 0.62', 0.014620076269434054),
 ('0.01 < DPP9 <= 0.62', 0.014233468530370219),
 ('RBMS3 > 0.14', -0.01397311659532302),
 ('AFAP1L2 <= -0.49', 0.013513291699169094),
 ('-0.27 < PCDHA6 <= -0.15', -0.013282032724620925),
 ('-0.16 < ZNF121 <= 0.36', -0.012237939967118536),
 ('-0.69 < ETS2 <= -0.33', -0.012130249143554747),
 ('-0.32 < HIRA <= 0.01', 0.011824893420593554),
 ('-0.15 < CTCFL <= -0.15', 0.011518207937759914),
 ('SLC26A1 > 0.13', -0.011436139537457993),
 ('AFM <= -0.19', 0.01137261851142505),
 ('KCNK2 > -0.12', 0.011298751348994069),
 ('-0.14 < FAM156A <= 0.01', 0.009490391822629276),
 ('-1.15 < ZNF341 <= 0.02', -0.008914076091869871),
 ('-0.88 < ZNF605 <= -0.17', 0.0076811589752568245)]
11 th class:  Kidney
[('HSPA8 > 0.67', 0.00293255750640923),
 ('UTP14A > 0.41', 0.002890785223609122),
 ('PRKCH > 0.57', 0.002858068907270017),
 ('-0.68 < KIF7 <= -0.31', 0.0026708699175098777),
 ('KRTAP20-1 <= -0.27', -0.002626737057466836),
 ('-0.06 < YARS2 <= 0.48', 0.002601510938333852),
 ('-0.63 < MRPL22 <= -0.24', -0.002592393302262348),
 ('TRIM42 <= -0.17', 0.002571488375846417),
 ('0.10 < SF1 <= 0.56', -0.00256526797708693),
 ('0.14 < HNRNPUL1 <= 0.63', 0.0023798296116239307),
 ('-0.11 < SCG2 <= -0.10', 0.002347252616172293),
 ('DPRX <= -0.12', 0.0023435790178256304),
 ('0.13 < ANAPC13 <= 0.66', -0.0023244868582121777),
 ('-0.23 < TMEM141 <= 0.22', 0.0022322320205005247),
 ('-0.35 < HLA-DPA1 <= 0.17', -0.0021342044236600023),
 ('0.11 < UBQLN2 <= 0.67', 0.0021236039509878015),
 ('TRBV5-3 <= -0.43', 0.00190275691590532),
 ('-0.53 < GABRB3 <= -0.35', -0.0018548597903594662),
 ('-0.10 < SLURP1 <= -0.10', -0.0017971178607052167),
 ('-0.32 < TATDN1 <= 0.33', -0.0014649156236860063)]
22 th class:  Peritoneum
[('UTP14A > 0.41', -0.003991718890055177),
 ('-0.13 < COL2A1 <= -0.13', -0.003778360343972422),
 ('MAGED4 > -0.12', 0.003610367336799061),
 ('-0.24 < CENPO <= 0.48', -0.0034924985501444813),
 ('TLK1 <= -0.64', -0.003427262768107696),
 ('CT47A4 <= -0.32', 0.003420272278999226),
 ('TGFBRAP1 > 0.49', -0.003344623860414122),
 ('-0.36 < COLEC12 <= -0.04', 0.0032410495085578523),
 ('-0.40 < PAMR1 <= -0.29', 0.0031275708508768135),
 ('ADRA2C > -0.08', 0.0029973048304439407),
 ('-0.20 < SP140L <= 0.43', 0.002938751783632513),
 ('POLR1B > 0.58', -0.0027238498133049164),
 ('-0.14 < DHRS1 <= 0.42', 0.002682874100782724),
 ('TMSB15A > -0.02', -0.0025963711786696716),
 ('TRAJ13 > -0.13', -0.0025693584005852944),
 ('CST8 <= -0.26', -0.0024033266035346002),
 ('OR6K6 > -0.15', -0.0024016424491658923),
 ('-0.49 < YPEL4 <= -0.32', 0.0022834794984481744),
 ('-0.04 < ARFIP2 <= 0.42', -0.002061992050661188),
 ('-1.23 < MICU1 <= 0.09', -0.0018565424045134582)]
Iteration Elapsed time: 591.1822307109833
889 th test sample
7 th class:  Colon
[('DUSP26 > -0.19', 0.0031450944963655955),
 ('HEATR6 <= -0.55', 0.0028038184809809756),
 ('SUV39H1 > 0.46', 0.0025838287047985093),
 ('F12 > -0.11', 0.0025339972061663644),
 ('-0.60 < GLIS3 <= -0.42', 0.0025296859281788136),
 ('-1.07 < KIAA0355 <= -0.03', 0.0025158476669198807),
 ('-0.13 < OTUD3 <= -0.08', 0.0024586317950996226),
 ('FAM185A <= -0.45', 0.0024551475414007367),
 ('ARRB1 > 0.24', 0.002446475103027584),
 ('-0.44 < MPPED2 <= -0.37', 0.0024195099042446434),
 ('-0.22 < DLEC1 <= -0.06', 0.0023959034274244366),
 ('ZBTB7B > 0.48', 0.00238669886514745),
 ('HOXB7 > 0.34', 0.0023807638479010325),
 ('SLC47A1 <= -0.31', 0.002306329040678198),
 ('-0.42 < CYP4F3 <= -0.33', 0.0022106406508838735),
 ('-0.87 < RSRC1 <= -0.02', 0.00220212803930468),
 ('ANKRD39 > 0.09', 0.0021952256064419655),
 ('S100A6 > 0.18', 0.0021907375900598585),
 ('GOLGA6L4 <= -0.15', 0.001988892624124499),
 ('HNRNPCL4 <= -0.13', -0.0014408474011607295)]
20 th class:  Pancreas
[('-0.29 < SLC16A8 <= 0.05', 0.002410128602733243),
 ('PDXP > 0.14', -0.0023347533896750264),
 ('EDDM3B <= -0.04', -0.0022609650684903395),
 ('STPG1 <= -0.55', 0.0021263462313221865),
 ('PLPP6 > 0.26', 0.0020648476557886914),
 ('CCNF > 0.46', -0.0019733715005094904),
 ('-0.53 < BLM <= -0.31', -0.0019365416036667203),
 ('-0.12 < LDLRAP1 <= 0.54', -0.0019317095686778181),
 ('VIPAS39 <= -0.45', -0.0019277881466678566),
 ('MXRA7 <= -0.66', 0.0018778676032992193),
 ('IFT80 <= -0.58', 0.0018658166735707248),
 ('GXYLT1 <= -0.55', -0.0018416854083873758),
 ('ORMDL3 > 0.18', 0.001809295963564664),
 ('IER3 > 0.18', 0.001768884636200072),
 ('-0.57 < TSEN2 <= -0.25', -0.0017589479857330003),
 ('AK6 <= -0.52', -0.0015325346376925916),
 ('-0.11 < MTRNR2L8 <= -0.06', 0.0014738627935226404),
 ('CCDC185 <= -0.22', 0.001356201453945275),
 ('MS4A10 <= -0.04', -0.0012700393583562155),
 ('SPIC <= -0.07', 0.0011423030554545514)]
25 th class:  Rectum
[('DDI1 <= -0.11', -0.002801144548999072),
 ('HNRNPCL4 <= -0.13', -0.0024908396006287875),
 ('-0.85 < CCNL2 <= -0.21', -0.002429602968961289),
 ('-0.44 < VSTM4 <= -0.29', 0.0024088275488213864),
 ('PLOD3 > 0.12', -0.0023866625271896018),
 ('-0.73 < PKD2 <= -0.25', -0.0023002269851958488),
 ('KLHL18 <= -0.53', 0.0022940084765554584),
 ('TIMP1 > 0.19', -0.002175325084877016),
 ('IL5 <= -0.24', -0.001999633416535053),
 ('-0.61 < PCDHGB4 <= -0.58', -0.0019516945753338011),
 ('PLPP6 > 0.26', 0.0018835874438203617),
 ('-0.73 < KIAA1211L <= -0.34', -0.0018571569869606783),
 ('FLII > 0.62', 0.0018460919641150215),
 ('P4HA3 > 0.01', 0.0016612825631950375),
 ('-0.17 < FTCD <= -0.16', -0.001623769881930757),
 ('PRDM13 > -0.18', 0.001604692354104388),
 ('-1.12 < KDM3B <= 0.03', -0.0015490952257857867),
 ('-0.07 < PIH1D1 <= 0.47', 0.001488471730563014),
 ('METTL4 <= -0.55', -0.0013848927720800474),
 ('KRT27 <= -0.13', 0.0012697580286299723)]
2 th class:  Bladder
[('-0.34 < GPRIN1 <= 0.11', -0.0011795925507283248),
 ('AGMAT > -0.15', 0.0011264996444767083),
 ('U2AF1 <= -0.25', -0.0010689468853464664),
 ('-0.93 < MKRN1 <= 0.06', 0.0010352042736920712),
 ('SRD5A3 > 0.17', -0.0010011124355587624),
 ('CCDC84 <= -0.71', -0.0009946200974019744),
 ('HNF4G > -0.02', 0.000992084856395285),
 ('-0.47 < PKDREJ <= -0.22', -0.0009750849724360495),
 ('CDH12 > -0.13', 0.0009675377599703993),
 ('-1.05 < PPP3CC <= -0.01', -0.0009439667881353728),
 ('ZNHIT2 > 0.46', -0.00086718610258513),
 ('-1.24 < SERBP1 <= 0.12', -0.0008647389494369479),
 ('TMEM178A <= -0.59', -0.0007872784222919173),
 ('IL22RA1 > -0.07', 0.0007837705541869977),
 ('-0.66 < ANKRD27 <= -0.09', -0.0007792273230562665),
 ('ZNF488 > -0.12', 0.0007444966094231395),
 ('C5orf47 <= -0.23', -0.0007267802176590338),
 ('ANKRD7 <= -0.42', -0.000679420345803089),
 ('AC126407.1 <= -0.23', 0.0006393979176242397),
 ('-0.79 < DDX19A <= 0.12', 0.0006379912376898258)]
13 th class:  Liver
[('LYZL1 <= -0.17', 0.0015377638849941153),
 ('-0.34 < TRPC6 <= -0.24', 0.0013966019734884233),
 ('ACTL8 > -0.11', -0.0013609536290366147),
 ('UNC5B > 0.23', 0.0012800143762461674),
 ('-0.65 < PFN2 <= -0.32', 0.001269637985084905),
 ('ZNF488 > -0.12', 0.0012695753901926662),
 ('ATF4 > 0.61', 0.0012607443897665466),
 ('DCSTAMP <= -0.16', 0.0012047423719809588),
 ('PAK5 <= -0.22', 0.0011181539516468089),
 ('TMEM191C > 0.08', 0.0010952878104302235),
 ('CD52 > -0.05', 0.0010711355188089832),
 ('-0.59 < RAB31 <= -0.33', 0.0010604377371000425),
 ('CDKN2D > 0.14', 0.001027551355382055),
 ('-0.91 < FBXO22 <= 0.05', -0.0008944289177219653),
 ('PRDX5 > 0.40', -0.000893291316472247),
 ('-0.24 < ABR <= 0.28', 0.0008859263721095472),
 ('RDH8 <= -0.22', -0.0008462634329818468),
 ('ZNF469 > -0.04', 0.0008380490212910087),
 ('KRT74 <= -0.19', 0.0008182382217565229),
 ('-0.51 < TULP4 <= -0.16', 0.0008064452982176732)]
Iteration Elapsed time: 584.6042516231537
365 th test sample
12 th class:  Leukemia
[('-0.54 < DMC1 <= -0.30', 0.005395497402209731),
 ('-0.18 < FBXO40 <= -0.14', 0.0053123068684885096),
 ('-0.42 < TAL2 <= -0.38', 0.005040999634215693),
 ('-0.03 < ZNF66 <= -0.02', 0.004853897341319446),
 ('ENHO <= -0.26', 0.00474441366374024),
 ('FLT3 > 0.33', 0.004651780670813449),
 ('-0.41 < CCDC126 <= -0.14', 0.004612656686177119),
 ('-0.26 < RORB <= -0.24', 0.004559254110736093),
 ('MYOD1 <= -0.07', 0.0044565981646119334),
 ('GNA15 > 0.01', 0.004364585767672897),
 ('ABCG5 <= -0.17', 0.004337946913181365),
 ('ETNK1 <= -0.60', 0.004313837917219324),
 ('CXXC4 <= -0.45', 0.004181463024360652),
 ('TAS2R31 <= -0.24', 0.0038897660256658325),
 ('-0.69 < CACHD1 <= -0.35', 0.0038346696328935754),
 ('TRAJ11 <= -0.21', 0.0037661788239566935),
 ('-0.30 < PARD6A <= 0.26', 0.0035725580545152456),
 ('SFXN4 > 0.57', 0.0034562167925037447),
 ('OPLAH <= -0.67', 0.0033445760425543647),
 ('FSHR <= -0.13', 0.002927191971694383)]
5 th class:  Breast
[('LMTK2 <= -0.44', 0.003716126296822436),
 ('ARFGEF2 <= -0.67', 0.003685835564401128),
 ('IL13 > -0.11', -0.00357523142841079),
 ('IGF2BP1 <= -0.45', 0.003511731942088209),
 ('CLSTN1 <= -0.71', -0.0034575444259052172),
 ('OR2J3 <= -0.18', -0.0032060755823367392),
 ('ZFP42 <= -0.49', 0.0031410831007226575),
 ('ZDHHC19 > -0.06', 0.0031276357946530314),
 ('KRTAP10-6 <= -0.16', 0.0030861327572703863),
 ('KRT76 <= -0.03', -0.0029590899302658572),
 ('-1.10 < VPS51 <= -0.01', -0.002928988304344586),
 ('-0.65 < STAMBPL1 <= -0.32', 0.0026647615043492627),
 ('-0.15 < TM4SF20 <= -0.15', -0.002611707146584052),
 ('-0.91 < ZNF773 <= -0.08', 0.002533251846791267),
 ('COL20A1 <= -0.34', -0.002432450686064713),
 ('-0.29 < SMCO4 <= 0.39', 0.0023965745067837424),
 ('IBTK <= -0.73', -0.002325205428607095),
 ('PTX4 <= -0.12', 0.002240196672875333),
 ('TP53TG3D <= -0.24', -0.002012659317832175),
 ('CCBE1 <= -0.31', 0.001999930484002979)]
3 th class:  Bone
[('-0.50 < TFAP2C <= -0.46', -0.0027053187100308635),
 ('OR2J3 <= -0.18', -0.0026967034928143336),
 ('-0.51 < CEP55 <= 0.38', 0.0025692414491687472),
 ('-0.64 < PKMYT1 <= -0.28', 0.0025257063653955857),
 ('SLC22A10 > -0.11', -0.002414788832669028),
 ('ZNF577 <= -0.68', 0.0023084157778539113),
 ('POU3F4 <= -0.20', -0.002252001246883908),
 ('CCND2 <= -0.55', 0.0022164938230417857),
 ('KRBOX1 <= -0.40', 0.002162704566244542),
 ('TMEM40 <= -0.32', -0.0021476539486224937),
 ('-0.98 < CPNE3 <= -0.07', -0.0021134912944220418),
 ('PDLIM2 > 0.40', 0.0019772260063641183),
 ('VWDE <= -0.01', -0.0019726066615159106),
 ('PKDCC <= -0.43', -0.001960155771884798),
 ('RTP5 > -0.15', 0.0018551612001477945),
 ('-0.99 < MED20 <= 0.02', 0.0017379395791892885),
 ('CHMP4A > 0.28', 0.001716191749238385),
 ('PRAMEF11 <= -0.14', 0.0016190003963470487),
 ('LDLRAD2 > -0.04', -0.001265442301826374),
 ('-0.83 < CTSZ <= -0.19', 0.0010358383709256655)]
2 th class:  Bladder
[('ABCC5 <= -0.38', 0.001300386002497003),
 ('RANBP17 <= -0.64', 0.001260570111037361),
 ('GAGE2A <= -0.30', 0.0012156405925752701),
 ('IGKV1-8 > -0.15', -0.0012144705557428593),
 ('RBM44 > -0.08', 0.0011598010673624158),
 ('-0.15 < TG <= -0.15', -0.0011585336800395724),
 ('ZNF552 <= -0.33', 0.0010710551396370658),
 ('FOXN4 <= -0.24', 0.0010240607410330117),
 ('STRADA > 0.28', -0.0010042971591827674),
 ('-0.22 < NUDT16L1 <= 0.39', -0.0009682428139763953),
 ('CATSPERD > -0.10', -0.0008354924575231081),
 ('IGKV1D-43 <= -0.13', -0.0008014969329988765),
 ('-0.40 < SPRED3 <= -0.24', 0.000784346528031111),
 ('AC107081.1 <= -0.47', -0.000760745854800642),
 ('-0.70 < EEF2KMT <= -0.09', -0.0007425106550334006),
 ('-0.24 < SLC26A9 <= -0.20', -0.0007406797854383615),
 ('-1.13 < COX18 <= 0.03', 0.0006863716999632891),
 ('C6orf118 <= -0.17', 0.0006844694107652416),
 ('-0.24 < TRIM34 <= -0.17', 0.000621903114909788),
 ('HAND1 <= -0.11', -0.0006186930996327484)]
14 th class:  Lung
[('SOX9 <= -0.63', 0.006505288516582407),
 ('-0.36 < TNFSF12-TNFSF13 <= 0.08', 0.006293069885360369),
 ('-0.04 < EDEM3 <= -0.01', -0.005359559906162284),
 ('-1.05 < AGBL5 <= -0.05', -0.0052558420172318585),
 ('-0.76 < SDR42E1 <= -0.43', -0.005201637318926928),
 ('DNAJB13 > -0.17', 0.005190332529876444),
 ('-1.13 < GAK <= 0.07', -0.004640814055087734),
 ('-1.25 < VPS33B <= 0.15', 0.004594899176464881),
 ('TSPO2 > -0.07', -0.004409457292360504),
 ('GPR149 <= -0.12', -0.004363957095895997),
 ('-0.05 < ZBTB9 <= 0.02', -0.0040276348805386546),
 ('AHR > 0.27', 0.003983128212682191),
 ('APC <= -0.55', 0.003930622051096981),
 ('-0.07 < CLN3 <= 0.46', 0.003915884084279087),
 ('DCDC2C <= -0.37', 0.003904470738529279),
 ('-1.25 < INTS12 <= 0.09', 0.0037850550991007687),
 ('NAALADL2 <= -0.61', 0.0036926934636566846),
 ('PHKA1 <= -0.80', 0.003593199187068556),
 ('-1.11 < ADAR <= -0.01', 0.003204018256459331),
 ('ATP6V1F > 0.48', 0.0030141705292169303)]
Iteration Elapsed time: 580.9515285491943
897 th test sample
12 th class:  Leukemia
[('DHRS9 <= -0.25', 0.005398360765851465),
 ('SLC12A6 > -0.01', 0.005238359096816013),
 ('LACTB <= -0.67', 0.004918057758757132),
 ('ACO1 <= -0.84', 0.004903643134945179),
 ('STIM2 <= -0.82', 0.004747893305876651),
 ('CGB5 <= -0.04', 0.004554323017963651),
 ('IGLV3-19 <= -0.20', 0.004455319329090167),
 ('-0.55 < SLC45A1 <= -0.37', 0.0043571303928542785),
 ('SMAD3 > 0.16', 0.004251392413924962),
 ('CRYGN <= -0.18', 0.004083328413761079),
 ('CNOT4 <= -1.26', 0.003984162551733397),
 ('HEPHL1 > 0.11', 0.0038682817672662695),
 ('CFAP52 <= -0.19', 0.0035779252170343627),
 ('IL4R > 0.28', -0.0035440711659888223),
 ('TRAV4 <= -0.34', 0.0035167426664304137),
 ('ADGRA1 <= -0.27', -0.003171834027741152),
 ('C2CD4B <= -0.33', -0.00301447142835471),
 ('PTGES2 <= -1.06', 0.002928296949166892),
 ('NDUFAF3 <= -1.10', -0.002079344143191453),
 ('SNRPD3 <= -1.09', -0.001991345418368539)]
33 th class:  none
[('HHLA3 <= -0.68', 0.006069267406768817),
 ('CLIC1 <= -1.08', 0.005400083015478557),
 ('-0.32 < GRK1 <= -0.08', -0.005224651394024891),
 ('CPNE6 <= -0.15', -0.005122003929086052),
 ('NOMO2 <= -0.96', 0.005108226463017775),
 ('YIPF6 <= -1.19', 0.00509169668289521),
 ('CD58 <= -0.91', -0.004705107622115758),
 ('PATE1 <= -0.07', 0.004587088378674564),
 ('-0.42 < LSAMP <= 0.12', 0.00442640743987311),
 ('-0.28 < SFTA3 <= -0.21', -0.004406324870354723),
 ('-0.33 < TRIM33 <= -0.13', -0.004134188676497066),
 ('PRIM2 <= -0.48', -0.004102759588989071),
 ('CBLN1 <= -0.11', -0.00405750087195821),
 ('HEPACAM2 > -0.13', -0.004041061211262238),
 ('BICDL1 <= -0.47', 0.0035558975325791064),
 ('-0.61 < TIGD7 <= -0.19', -0.00333887626456693),
 ('FKBP1C <= -0.04', -0.003054408086325554),
 ('WIZ <= -0.40', 0.0025646375839938067),
 ('JMJD6 <= -1.00', -0.0025524068523573745),
 ('AK7 <= -0.42', 0.00254339370904921)]
15 th class:  Lymph
[('-0.17 < BTBD18 <= -0.15', -0.0015787602169683621),
 ('CEP68 <= -0.68', -0.0015746741179660211),
 ('-0.58 < PDXP <= -0.33', 0.0015143519006863158),
 ('SRSF6 <= -1.21', 0.0014762169319667998),
 ('CEP19 <= -0.43', 0.00141576764434739),
 ('TMEM121 <= -0.53', -0.0014153448863391646),
 ('WDR17 <= -0.46', -0.0013648604861005336),
 ('-0.29 < AVPI1 <= 0.17', 0.0012733292957678264),
 ('CX3CL1 <= -0.63', -0.0011922796003431462),
 ('ITLN2 <= -0.09', -0.0011847577740978239),
 ('GTF3C3 <= -1.18', 0.0011670128812945638),
 ('SCRT2 > -0.16', 0.001135340000576695),
 ('PIWIL3 <= -0.10', 0.0011247462494177831),
 ('OGFOD3 <= -1.16', 0.0010910333136261057),
 ('-0.32 < ASTN2 <= -0.27', 0.0010881355342904759),
 ('APOBEC1 <= -0.15', 0.001016518631135938),
 ('AC100821.1 > -0.01', 0.0010089165369703108),
 ('CFAP206 <= -0.47', 0.000901148038332889),
 ('HRH3 <= -0.17', -0.0006207172717916899),
 ('TRBV10-3 <= -0.15', -0.0005916943935921096)]
32 th class:  Uterus
[('RASSF2 <= -0.35', 0.010650274921459273),
 ('ZNF398 <= -0.96', -0.0089454691162852),
 ('MPL <= -0.15', -0.008737535682081481),
 ('-0.15 < IGKV2D-29 <= -0.05', -0.00806404496306515),
 ('DEFA1 > 0.02', 0.007605149407915771),
 ('PUM3 <= -0.92', 0.00697929236836841),
 ('ZBTB21 > 0.20', -0.006784980633004022),
 ('FSTL3 <= -0.49', 0.0067747954825439355),
 ('BCL2 <= -0.57', -0.006644949212860112),
 ('BEX1 <= -0.27', 0.006532805988810108),
 ('PACRG <= -0.28', -0.006352628884701354),
 ('-0.25 < CXCL11 <= -0.22', -0.006225562088927251),
 ('C6orf58 <= -0.05', 0.005915075122493382),
 ('TMX4 <= -0.76', -0.005759291298726176),
 ('TNFAIP6 <= -0.33', 0.005692033897268458),
 ('GJB3 <= -0.41', 0.005547797357736615),
 ('FUT7 > -0.11', 0.0055361522421080835),
 ('CLVS2 <= -0.18', -0.005271217304324432),
 ('ZG16 <= -0.08', 0.004800093992106611),
 ('RBP7 <= -0.43', 0.004618278157638184)]
11 th class:  Kidney
[('OR6B2 <= -0.16', -0.003704164682291963),
 ('TEAD2 <= -0.76', 0.002585095453545976),
 ('-0.33 < MTNR1A <= -0.17', -0.0024662235425372365),
 ('-1.10 < ZNF32 <= -0.05', -0.00236929294405162),
 ('PCBP4 <= -0.70', -0.0023336267136950827),
 ('SGSM2 <= -0.88', 0.0022718306254108315),
 ('C1D <= -1.11', -0.0021739046921299536),
 ('PAPLN <= -0.46', -0.002094566815552303),
 ('EBLN1 > 0.04', -0.0020935902001008354),
 ('DDX6 <= -1.01', -0.002017985851823484),
 ('MTIF3 <= -1.18', -0.0019543020691462675),
 ('RLBP1 <= -0.11', 0.0019173127101724077),
 ('BEND3 <= -0.44', 0.0018962625485678501),
 ('CLEC4M <= -0.09', 0.001881572502284941),
 ('GIMAP7 <= -0.61', 0.0018690295010787247),
 ('CCT3 <= -1.10', 0.0018235720900924553),
 ('FCGRT <= -0.73', 0.0018127869314499036),
 ('MED12 <= -1.01', 0.0015982677704003655),
 ('ACAP3 <= -0.55', 0.0015976499343271057),
 ('TBPL2 <= -0.06', 0.0013768866648953759)]
Iteration Elapsed time: 620.9794812202454
1071 th test sample
2 th class:  Bladder
[('0.08 < ZBED5 <= 0.66', 0.002883981826190614),
 ('RBFOX1 <= -0.27', 0.002593031629185171),
 ('-0.53 < IL1RAP <= -0.34', 0.002581928633242996),
 ('LPCAT3 > 0.39', 0.0025568734805556867),
 ('-1.14 < C1GALT1C1 <= 0.03', 0.0025231014696088664),
 ('-0.15 < MED26 <= 0.05', 0.0024762197303169666),
 ('0.06 < CRYZL1 <= 0.67', 0.002395934352468548),
 ('TMEM131 > 0.19', 0.002343673880434462),
 ('-0.84 < GPN3 <= 0.05', 0.0023251427705678557),
 ('-0.27 < GAB2 <= 0.15', 0.002307287824183588),
 ('-0.25 < KAAG1 <= -0.11', 0.002262531196680204),
 ('CYP4F2 > -0.17', 0.0022351378317948807),
 ('-0.24 < EPHB4 <= 0.44', 0.0022121949294721557),
 ('-0.16 < TCTN2 <= 0.49', 0.002187433599545714),
 ('KCTD6 <= -0.45', 0.0021783612443797493),
 ('-0.15 < CRLF2 <= -0.10', 0.0020142552286909177),
 ('-0.07 < SCAMP2 <= 0.54', 0.0019292148492827184),
 ('-0.09 < TMEM88 <= -0.08', 0.0019098447429457353),
 ('-0.16 < BTBD3 <= 0.34', 0.0017310729694204436),
 ('KRTAP19-3 <= -0.45', 0.0014679293392719224)]
14 th class:  Lung
[('ZNF812P > -0.15', 0.005936239416727491),
 ('AHR > 0.27', 0.005322644798745055),
 ('-0.33 < POMC <= -0.29', 0.004895271950632155),
 ('DNAI2 > -0.10', 0.004802162639575258),
 ('ATAD2B > 0.39', -0.004784639632491749),
 ('RAD21 > 0.43', -0.004702444090028963),
 ('PLTP > 0.03', 0.004420277792447754),
 ('-0.08 < TRIM25 <= 0.10', 0.004379731252275906),
 ('FSIP1 > -0.13', 0.004378088698757323),
 ('-0.12 < S100A12 <= -0.11', -0.004109869046455968),
 ('-0.44 < CDCP1 <= 0.38', 0.004076286891923345),
 ('PROCA1 > 0.13', 0.0040744245460724885),
 ('SLC8A1 > 0.07', -0.004069029522247535),
 ('NRAS > 0.56', 0.003962009228449768),
 ('-0.54 < GBP1 <= -0.32', 0.0038037734618663675),
 ('-0.17 < IGHM <= -0.11', -0.0037696175406478653),
 ('HS3ST1 > 0.01', 0.0035167082898915777),
 ('SYNGR2 > 0.49', 0.003283399074063343),
 ('RILPL2 <= -0.62', 0.003220545684956824),
 ('ZNF605 > 0.57', -0.002972055018225717)]
5 th class:  Breast
[('TESMIN > 0.08', 0.003993172309430042),
 ('-0.03 < FRA10AC1 <= 0.61', 0.003665154688481055),
 ('ERBB2 > 0.01', 0.003574127895235859),
 ('PDX1 > -0.15', 0.0034020367410904913),
 ('-0.13 < ZSWIM4 <= 0.30', 0.003271103148177862),
 ('-0.20 < OIT3 <= -0.14', -0.0032577724148167473),
 ('OSBPL11 > 0.29', -0.003219408812028365),
 ('OR8G5 > -0.09', 0.0032069216992227263),
 ('TERF2 > 0.61', 0.003168882612451787),
 ('CDK12 > 0.09', 0.0031632032966677106),
 ('RGS16 > 0.05', -0.003127615731627097),
 ('-0.14 < HTR6 <= -0.10', -0.00280442093005177),
 ('AMY2A <= -0.04', -0.0027434046157253198),
 ('-0.80 < NIPAL2 <= -0.25', -0.0025261588126592465),
 ('SIDT1 > 0.10', 0.0022690290908017868),
 ('FOXR1 <= -0.13', 0.002158939802923653),
 ('-0.26 < LRRN2 <= -0.23', 0.002137853367776343),
 ('EPHA1 > 0.01', 0.0020117706975761943),
 ('-0.35 < TLR7 <= 0.10', 0.002006635362193022),
 ('CCP110 > 0.39', 0.0019268533080645924)]
6 th class:  Cervix
[('C17orf64 > -0.23', -0.0012517551009360475),
 ('-0.27 < SEMA3G <= 0.04', 0.0011731196093141348),
 ('-0.34 < RASSF9 <= -0.27', 0.0011004639873291142),
 ('ST3GAL3 > 0.48', -0.0010823816972916327),
 ('OR56A1 <= -0.08', 0.0010669892129675721),
 ('-0.13 < RIT1 <= 0.33', -0.0010609445199630829),
 ('FLRT3 > -0.03', 0.001021688502718105),
 ('PHF13 > 0.60', 0.0009447970354089815),
 ('-1.03 < C6orf89 <= 0.06', 0.0009374448109748452),
 ('MCM3 > 0.46', -0.00092050701888731),
 ('DUSP18 > 0.39', 0.0008891027938886568),
 ('-0.18 < PI16 <= -0.14', -0.0008554527298276591),
 ('SPDYE3 > 0.41', -0.0008501158552327576),
 ('-0.10 < CLCN7 <= 0.21', 0.0007596421978642381),
 ('POU2F1 > 0.42', 0.0007559689408731548),
 ('-0.12 < HSPD1 <= 0.40', -0.0007190718223206009),
 ('-0.34 < GPRIN1 <= 0.11', -0.0006546790408696123),
 ('GART > 0.57', -0.0006272926823257195),
 ('AGTR2 <= -0.07', 0.0006122512666054526),
 ('-0.15 < GALR1 <= -0.13', 0.0005816331712584676)]
10 th class:  Head
[('OR5R1 <= -0.17', 0.00222423322551826),
 ('SLC2A8 > 0.36', 0.0021500431143380246),
 ('-0.43 < EFNB3 <= -0.25', -0.00210527266232282),
 ('MACROD2 > -0.04', -0.0021005030272087995),
 ('-0.11 < GTF2F2 <= 0.43', 0.0020274418009136),
 ('KIF13B <= -0.37', 0.0020243915648067877),
 ('-1.09 < CALM2 <= 0.01', -0.001879182003842192),
 ('GALNT12 > 0.31', 0.00187656773455306),
 ('-0.34 < NCR1 <= -0.31', -0.0017679204867730631),
 ('HIRIP3 > 0.50', 0.0017291271072981118),
 ('TAS2R40 <= -0.20', -0.0016755600940126939),
 ('NDUFB9 > 0.37', -0.0016248433753651293),
 ('CFAP126 <= -0.19', 0.0016067205128749503),
 ('HTR3C <= -0.05', -0.0015543933448178087),
 ('PRR22 <= -0.54', 0.001503646002336556),
 ('OR4A15 <= -0.53', 0.001363086706518395),
 ('PLD5 <= -0.37', 0.0013454656085339425),
 ('-0.33 < TSPAN12 <= 0.03', -0.0011972266222028721),
 ('PLCZ1 <= -0.15', 0.0010312831285167222),
 ('PF4V1 <= -0.29', 0.0008225102597446726)]
Iteration Elapsed time: 627.5231196880341
Total Elapsed time: 627.5237004756927

For this multi-class classification problem, we set the top_labels parameter, so that we only explain the top class with the highest level of probability.

In [ ]:
# # %%prun
# start = time.time()
# i = np.random.randint(0, X_test.shape[0])
# print(i)
# exp = explainer.explain_instance(X_test[i], model.predict_proba, num_features=10, top_labels=5)
# print(time.time() - start)
In [ ]:
# exp.show_in_notebook(show_table=True, show_all=False)

feature1 ≤ X means when this feature’s value satisfy this criteria it support class 0.
Float point number on the horizontal bars represent the relative importance of these features.

In [ ]:
# exp.show_in_notebook(show_table=True, show_all=True)
In [ ]:
# #for easier analysis and further processing
# for i in exp.available_labels():
#     print(label_encoder.classes_[i])
# #     display(pd.DataFrame(exp.as_list(label=i)))
#     display(exp.as_list(label=i))
In [ ]: